

#clean
data_student<-data_student_raw %>%
  dplyr::select(Cod_SIIIR_hs_adm,unitate_de_invatamant,liceu_repartizat,Cod_SIRUTA2_hs_adm,entrance_perc,dec,grad_perc,an_bac) %>%
  mutate(p=trunc(entrance_perc*100),
         p_exam=trunc(entrance_perc*100),
         v=cut(entrance_perc,c(-Inf,seq(from=0.05,to=0.95,by=0.05),Inf),labels=1:20)) %>%
  group_by(an_bac,Cod_SIRUTA2_hs_adm) %>%
  mutate(n_hs=n_distinct(Cod_SIIIR_hs_adm[!is.na(Cod_SIIIR_hs_adm)])) %>%
  ungroup() %>%
  filter(n_hs>0 & !is.na(dec)) %>%
  mutate(n_hs_town_group=cut(n_hs,c(-Inf,1,2,3,15,Inf),labels=c("1","2","3","4-15","16+"))) %>%
  mutate(n_graph=case_when(
    n_hs_town_group=='1' ~ 0,
    n_hs_town_group=='2' ~ 1,
    n_hs_town_group=='3' ~ 2,
    n_hs_town_group=='4-15' ~ 3,
    n_hs_town_group=='16+' ~ 4
  )) %>%
  mutate(n_graph=n_graph/4*0.5-2/4*0.5+as.numeric(dec)) 
  


#means by entrance dec
summary<-data_transform%>%
  group_by(n_hs_town_group,n_graph,dec) %>%
  summarise(mean=mean(grad_perc,na.rm=T),sd=sd(grad_perc,na.rm=T),n=n(),mean_entr=mean(entrance_perc,na.rm=T)) %>%
  ungroup

g<-ggplot(summary)+
  geom_point(aes(x=dec,y=mean*100,color=n_hs_town_group),size=2.5,alpha=1)+
  geom_line(aes(x=dec,y=mean*100,color=n_hs_town_group,group=n_hs_town_group),alpha=1)+
  xlab("Own Admission Score Decile (National-Cohort)")+
  ylab("Own Graduation Score Percentile (National-Cohort)")+
  scale_x_discrete(breaks=0:10)+
  scale_y_continuous(breaks=seq(from = 0, to = 100, by = 10),limits=c(20,90))+
  labs(color=str_wrap("Number of High Schools in Town",width=10))+
  theme(axis.text.x=element_text(size=12),
        axis.text.y = element_text(size=12),  
        axis.title.x = element_text(size=14),
        axis.title.y = element_text(size=14),
        legend.title=element_text(size=14),
        legend.text=element_text(size=18),
        legend.title.align=0.5)
g


#1st decile:
(summary %>% filter(n_hs_town_group==1 & dec==1) %>% dplyr::select(mean)-
summary %>% filter(n_hs_town_group=='16+' & dec==1) %>% dplyr::select(mean) )*100

(summary %>% filter(n_hs_town_group==1 & dec==1) %>% dplyr::select(mean)-
    summary %>% filter(n_hs_town_group=='2' & dec==1) %>% dplyr::select(mean) )*100

(summary %>% filter(n_hs_town_group==1 & dec==1) %>% dplyr::select(mean)-
    summary %>% filter(n_hs_town_group=='16+' & dec==1) %>% dplyr::select(mean)) *100

#10th decile
(summary %>% filter(n_hs_town_group==1 & dec==10) %>% dplyr::select(mean)-
  summary %>% filter(n_hs_town_group=='16+' & dec==10) %>% dplyr::select(mean) )*100

(summary %>% filter(n_hs_town_group=='2' & dec==10) %>% dplyr::select(mean)-
    summary %>% filter(n_hs_town_group=='1' & dec==10) %>% dplyr::select(mean) )*100


(summary %>% filter(n_hs_town_group==1 & dec==10) %>% dplyr::select(mean)-
    summary %>% filter(n_hs_town_group=='16+' & dec==10) %>% dplyr::select(mean) )*100



#figure 02
current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
pdf("04_figure_02_color.pdf" , width = 8 , height = 5) 
g
dev.off()

pdf("04_figure_02.pdf" , width = 8 , height = 5) 
g + scale_color_grey()+ scale_fill_grey()
dev.off()

#figure A.1.1 appendix
summary_p<-data_transform%>%
  group_by(n_hs_town_group,n_graph,p) %>%
  summarise(mean=mean(grad_perc,na.rm=T),sd=sd(grad_perc,na.rm=T),n=n(),mean_entr=mean(entrance_perc,na.rm=T)) %>%
  ungroup

current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
pdf("04_figure_02_percentile_appendix.pdf" , width = 8 , height = 5) 
ggplot(summary_p)+
  geom_line(aes(x=p,y=mean*100,color=n_hs_town_group,group=n_hs_town_group),alpha=1,size=1)+
  xlab("Own Admission Score Percentile (National-Cohort)")+
  ylab("Own Graduation Score Percentile (National-Cohort)")+
  scale_x_continuous(breaks=seq(from = 0, to = 100, by = 10))+
  scale_y_continuous(breaks=seq(from = 0, to = 100, by = 10),limits=c(20,100))+
  labs(color=str_wrap("Number of High Schools in Town",width=10))+
  theme(axis.text.x=element_text(size=12),
        axis.text.y = element_text(size=12),  
        axis.title.x = element_text(size=14),
        axis.title.y = element_text(size=14),
        legend.title=element_text(size=14),
        legend.text=element_text(size=18),
        legend.title.align=0.5)
dev.off()






